## [1] "/Users/victoriaqc/Documents/GitHub/PFAS_met_cage"

Introduction

Start with Plate Reader Spreadsheet

View in Excel vs. View in R

test <- read.xlsx(here("VQC R work", "Albuwell Data Sheets", "Albuwell_061225.xlsx"))

# isolate to plate
subset_df <- test[9:16, 1:9]

The subset of the data looks like this

Next we will rename the columns to 1-12 and pivot the data to long format

# rename the columns to 1-3
colnames(subset_df) <- c("Row", 1:8)
subset_df %>%
  mutate(across(-Row, as.numeric)) %>%  # or use as.character if preferred
  pivot_longer(cols = -Row, names_to = "Column", values_to = "Value")
clean_df <- subset_df %>%
  mutate(across(-Row, as.numeric))  # or as.character if needed


tidy_df <- clean_df %>%
  pivot_longer(cols = -Row,
               names_to = "Column",
               values_to = "Value") %>%
  mutate(Cell = paste0(Row, Column)) %>%
  select(Cell, Value)

This is what the tidy data looks like

1. Load Sample Info and Merge the data

# Load Sample Info

df.Sample <- read.xlsx(here("VQC R work", "Albuwell Data Sheets", "Long_PFAS_Albuwell_Sample_061225.xlsx"))

tidy_data <- tidy_df %>%
  left_join(df.Sample, by = c("Cell" = "Cell")) 

a. Graph the Ladder

Graph ladder and calculate the curve fit

# filter if Sample is Ladder or Blank

df.Ladder <- tidy_data %>%
  filter(Sample == "Ladder" | Sample == "Blank")

# Subtract Blank and remove the Blank row

df.Ladder2 <- df.Ladder %>%
    mutate(Value2 = ifelse(Sample != "Blank", Value - filter(df.Ladder, Sample == "Blank")$Value, NA)) %>%
    filter(!is.na(Value2))

# Create a scatter plot of df.Ladder2

ggplot(df.Ladder2, aes(x = Ladder_Value, y = Value2, color = Sample)) +
  geom_point() +
  theme_minimal() +
  labs(title = "Ladder",
       x = "Cell",
       y = "Value",
       color = "Sample")

# curve fit the data using a natural log

x <- df.Ladder2$Ladder_Value
y <- df.Ladder2$Value2

# Fit the non-linear regression model
fit <- nls(y ~ a + b * log(x), start = list(a = 1, b = 1))

# Print summary of the non-linear regression
summary(fit)
## 
## Formula: y ~ a + b * log(x)
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a  0.89423    0.02068   43.25 1.52e-14 ***
## b -0.24175    0.01472  -16.42 1.38e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07638 on 12 degrees of freedom
## 
## Number of iterations to convergence: 1 
## Achieved convergence tolerance: 1.024e-07
# Extract coefficients
coefficients <- coef(fit)
m <- as.numeric(coefficients[2])
b <- as.numeric(coefficients[1])

# Calculate predicted values
predicted <- predict(fit, list(x = x))

# Calculate R-squared
residuals <- y - predicted
rss <- sum(residuals^2)
tss <- sum((y - mean(y))^2)
r_squared <- 1 - rss/tss

# Create data for plotting
plot_data <- data.frame(x = x, y = y, Predicted = predicted)

# Create the ggplot with scatter plot and fitted line
p <- ggplot(plot_data, aes(x = x, y = y)) +
  geom_point(color = "darkred", size = 5) +  # Scatter plot of x vs y
  geom_line(aes(y = Predicted), color = "red") +  # Fitted line
  labs(
    title = paste0("Scatter plot with Non-linear Logarithmic Fit: y =", round(m, 3), "ln(x) +", round(b, 3), "\nR² =", round(r_squared, 3)),
    x = "Albumin Concentration",
    y = "Absorbance"
  ) +
  theme_classic()

# Print the plot
print(p)

Calcuate back the latter values based on the fit equation

# Subtract Blank and calculate the values

tidy_data_blank <- tidy_data %>%
    mutate(Value2 = ifelse(Sample != "Blank", Value - filter(df.Ladder, Sample == "Blank")$Value, NA)) %>%
    filter(!is.na(Value2))

tidy_data_solved <- tidy_data_blank  %>%
  mutate(Solved = exp((Value2 - b) / m))

df_test <- tidy_data_solved %>%
  filter(Sample == "Ladder")

# Create a scatter plot of df_test

ggplot(df_test, aes(x = Ladder_Value, y = Solved, color = as.character(Ladder_Value))) +
  geom_point(size = 5) +
  theme_minimal() +
  labs(title = "Checking Equation Fit Against Ladder",
       x = "Sample",
       y = "Value",
       color = "Cell")

Graph the sample data on the solved curve split by dilution factor

df_sample_test <- tidy_data_solved %>%
  filter(Type == "Sample")

df_sample_test
p + geom_point(data = df_sample_test, aes(x = Solved, y = Value2, color = as.character(Dilution)), size = 3) +
  labs(color = "Dilution")  # Change the legend title to "Dilution"

Calculate with Dilution Factor

tidy_data_solved_dil <- tidy_data_solved  %>%
  mutate(Concentration = Solved * Dilution)

# createa  scatterplot of tidy_data_solved_dil of the Concentration that colors by dilution

ggplot(tidy_data_solved_dil, aes(x = Sample, y = Concentration, color = as.character(Replicate))) +
  geom_point(size = 3) +
  theme_minimal() +
  labs(
    title = "Concentration of Albumin by Dilution",
    x = "Dilution",  # Change if "Sample" is not "Cell"
    y = "Concentration (µg/mL)", 
    color = "Replicate"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# create a new column that measures the mean of the values for each sample

tidy_data_solved_dil2 <- tidy_data_solved_dil %>%
  group_by(Sample) %>%
  mutate(Mean = mean(Concentration, na.rm = TRUE))

ggplot(tidy_data_solved_dil2, aes(x = Sample, y = Mean, color = as.character(Dilution))) +
  geom_point(size = 3) +
  theme_minimal() +
  labs(
    title = "Concentration of Albumin by Dilution",
    x = "Dilution",  # Or "Sample", based on context
    y = "Mean Albumin Concentration (µg/mL)",  
    color = "Dilution"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

2. Add Sample Metadata

df.meta <- read.xlsx(here("VQC R work", "Albuwell Data Sheets", "Long_PFAS_Metadata.xlsx"), colNames = F, rowNames = T)

df.meta2 <- df.meta %>%
t() %>%
as.data.frame() 

df.meta2$UV <- as.numeric(df.meta2$UV)

tidy_data_solved_dil_meta <- tidy_data_solved_dil2 %>%
  left_join(df.meta2, by = c("Sample"))

tidy_data_solved_dil_meta2 <- tidy_data_solved_dil_meta  %>%
filter(!is.na(Concentration)) %>%
mutate(Alb = Mean * UV)

#Create albumin table
albumin_table <- tidy_data_solved_dil2 %>%
  group_by(Sample, Dilution) %>%   # group by both Sample and Dilution
  summarize(Alb_Concentration = mean(Concentration), .groups = "drop")
tidy_data_solved2 <- tidy_data_solved %>%
  mutate(Dilution_calc = ifelse(Solved > 5, "Above", "OK"))

tidy_data_solved_dil2 <- tidy_data_solved2  %>%
  mutate(Concentration = Solved * Dilution)

# create a scatterplot of tidy_data_solved_dil of the Concentration that colors by dilution

ggplot(tidy_data_solved_dil2, aes(x = Sample, y = Concentration, color = as.character(Dilution))) +
  geom_point() +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  labs(title = "Concentration of Albumin by Dilution",
       x = "Cell",
       y = "Concentration",
       color = "Dilution")

ggplot(tidy_data_solved_dil2, aes(x = Sample, y = Concentration, color = Dilution_calc)) +
  geom_point() +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  labs(title = "Concentration of Albumin by Dilution",
       x = "Cell",
       y = "Concentration",
       color = "Dilution")

table <- tidy_data_solved_dil2 %>%
  filter(Type == "Sample") %>%
  group_by(Dilution, Dilution_calc) %>%
  summarize(count = n())

table

3. Read in Creatinine Values

df_creatinine <- read.xlsx(here("VQC R work", "Albuwell Data Sheets", "Creatinine061825.xlsx"))

subset_pre <- df_creatinine[19:25, 2:10]

# rename the columns to 1-12
colnames(subset_pre) <- c("Row", 1:8)

subset_pre <- subset_pre %>%
  mutate(across(-Row, as.numeric))

tidy_df_pre <- subset_pre  %>%
  pivot_longer(cols = -Row, # Pivot all columns except Row
               names_to = "Column", # Name of new column for the old column names
               values_to = "Value") %>% # Name of new column for the values
  mutate(Cell = paste0(Row, Column))%>% # Create the new Cell column
  select(-Row, -Column) %>%
  select(Cell, Value)

as.character(colnames(subset_pre))
## [1] "Row" "1"   "2"   "3"   "4"   "5"   "6"   "7"   "8"
#set the colnames as characters

df_creatinine_post <- read.xlsx(here("VQC R work", "Albuwell Data Sheets", "Creatinine_Acid052125.xlsx"))

subset_post <- df_creatinine_post[9:15, 1:9]

# rename the columns to 1-12
colnames(subset_post) <- c("Row", 1:8)

subset_post <- subset_post %>%
  mutate(across(-Row, as.numeric))

tidy_df_post <- subset_post  %>%
  pivot_longer(cols = -Row, # Pivot all columns except Row
               names_to = "Column", # Name of new column for the old column names
               values_to = "Value") %>% # Name of new column for the values
  mutate(Cell = paste0(Row, Column))%>% # Create the new Cell column
  select(-Row, -Column) %>%
  select(Cell, Value)

# merge tidy_df_pre and tidy_df_post by Row

tidy_df <- merge(tidy_df_pre, tidy_df_post, by = "Cell")


tidy_df <- tidy_df %>%
  mutate(Value = Value.x - Value.y) %>%
  select(Cell, Value)

Load Sample Info and Merge the data

# Load Sample Info

df.Sample <- read.xlsx(here("VQC R work", "Albuwell Data Sheets", "Creatinine_Sample_061825.xlsx"))

tidy_data <- tidy_df %>%
  left_join(df.Sample, by = c("Cell" = "Cell")) 

a. Graph the Ladder

Graph ladder and calculate the curve fit for creatinine

# filter if Sample is Ladder or Blank

df.Ladder_Cre <- tidy_data %>%
  filter(Sample == "Ladder")


ggplot(df.Ladder_Cre, aes(x = Ladder_Value, y = Value, color = Sample)) +
  geom_point() +
  theme_minimal() +
  labs(title = "Ladder",
       x = "Cell",
       y = "Value",
       color = "Sample")

# curve fit the data using a natural log

x <- df.Ladder_Cre$Ladder_Value
y <- df.Ladder_Cre$Value

fit <- lm(Value ~ Ladder_Value, data = df.Ladder_Cre)

# Print summary of the non-linear regression
summary(fit)
## 
## Call:
## lm(formula = Value ~ Ladder_Value, data = df.Ladder_Cre)
## 
## Residuals:
##         1         2         3         4         5         6 
##  0.002612 -0.002388  0.005496 -0.006504  0.002892 -0.002108 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.0004104  0.0031685    0.13    0.903    
## Ladder_Value 0.0556978  0.0005233  106.44 4.67e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.004946 on 4 degrees of freedom
## Multiple R-squared:  0.9996, Adjusted R-squared:  0.9996 
## F-statistic: 1.133e+04 on 1 and 4 DF,  p-value: 4.671e-08
coefficients <- coef(fit)
m <- coefficients["Ladder_Value"]
b <- coefficients["(Intercept)"]


plot_data <- data.frame(x = x, y = y, Predicted = predict(fit, newdata = data.frame(Ladder_Value = x)))


# Create the ggplot with scatter plot and fitted line
p <- ggplot(plot_data, aes(x = x, y = y)) +
  geom_point() +  # Scatter plot of x vs y
  geom_line(aes(y = Predicted), color = "red") +  # Fitted line
  labs(
    title = paste0("Scatter plot with Non-linear Logarithmic Fit: y =", round(m, 3), "ln(x) +", round(b, 3)),
    x = "Creatinine Concentration",
    y = "Absorbance"
  ) +
  theme_classic() +
  geom_point(color = "darkred", size = 5)

p

b. Calcuate the values from the samples

# Subtract Blank and calculate the values

tidy_data_blank <- tidy_data %>%
   mutate(Value = ifelse(Sample != "Blank", Value - filter(df.Ladder, Sample == "Blank")$Value, NA)) %>%
   filter(!is.na(Value))

tidy_data_blank <- tidy_data %>%
    mutate(Value = Value) %>%
    filter(!is.na(Value))

tidy_data_solved <- tidy_data_blank  %>%
  mutate(Solved = ((Value - b) / m))

df_test <- tidy_data_solved %>%
  filter(Sample == "Ladder")

# Create a scatter plot of df_test

ggplot(df_test, aes(x = Ladder_Value, y = Solved, color = as.character(Ladder_Value))) +
  geom_point() +
  theme_minimal() +
  labs(title = "Checking Equation Fit Against Ladder",
       x = "Sample",
       y = "Value",
       color = "Cell")

df_sample_test <- tidy_data_solved %>%
  filter(Sample != "Blank" | Sample != "Ladder"| Sample != "Max")

# filter tidy_data_solved to not include Blank, Max, or Ladder in the Sample column

df_sample_test
p + geom_point(data = df_sample_test, aes(x = Solved, y = Value), color = as.character(df_sample_test$Dilution), size = 3)

# If Dilution is greater than 10 then write (Above), if the not write OK

df_sample_test <- df_sample_test %>%
  mutate(Dilution_calc = ifelse(Solved < 1, "Below", "OK"))

c. Calculate with Dilution Factor

tidy_data_solved <- tidy_data_solved %>%
  mutate(Dilution_calc = ifelse(Solved < 1, "Below", "OK"))

tidy_data_solved_dil3 <- tidy_data_solved  %>%
  mutate(Concentration = Solved * Dilution) %>%
  filter(!is.na(Concentration))

# create a  scatterplot of tidy_data_solved_dil of the Concentration that colors by dilution

ggplot(tidy_data_solved_dil3, aes(x = Sample, y = Concentration, color = as.character(Dilution))) +
  geom_point() +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  labs(title = "Concentration of Creatinine by Dilution",
       x = "Cell",
       y = "Concentration",
       color = "Dilution")

ggplot(tidy_data_solved_dil3, aes(x = Sample, y = Concentration, color = Dilution_calc)) +
  geom_point() +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  labs(title = "Concentration of Creatinine by Dilution",
       x = "Cell",
       y = "Concentration",
       color = "Dilution")

table <- tidy_data_solved_dil3 %>%
  group_by(Dilution, Dilution_calc) %>%
  summarize(count = n())

table
creatinine_table <- tidy_data_solved_dil3 %>%
  filter(Dilution == "5") %>% 
  group_by(Sample) %>%
  summarize(Cre_Concentration = mean(Concentration))
#convert grams to dL for urine
tidy_data_solved_dil_meta$Urine_dL <- tidy_data_solved_dil_meta$UV * 0.01
  
#creatinine mass
tidy_data_solved_dil_meta$Creatinine_Mass <- tidy_data_solved_dil_meta$Urine_dL * tidy_data_solved_dil_meta$Concentration

tidy_data_solved_dil_meta <- tidy_data_solved_dil_meta[!is.na(tidy_data_solved_dil_meta$Treatment), ]

tidy_data_solved_dil_meta$Treatment <- factor(tidy_data_solved_dil_meta$Treatment,
  levels = c("Vehicle", "1 mg", "10 mg")
)

#Dot plot for Creatinine Mass
tidy_data_solved_dil_meta$`Timepoint Treatment` <- factor(tidy_data_solved_dil_meta$`Timepoint Treatment`,
  levels = c("Baseline Vehicle", "Final Vehicle", "Baseline 1mg", "Final 1mg", "Baseline 10mg", "Final 10mg")
)

ggplot(tidy_data_solved_dil_meta, aes(x = `Timepoint Treatment`, y = Creatinine_Mass, color = Timepoint)) +
  geom_point(size = 3) +
  labs(title = "Creatinine Mass at Baseline and Endpoint by Treatment Group",
       x = "Treatment Group",
       y = "Creatinine Mass (g/dL)",
       color = "Treatment")

4. Calculate ACR

df.meta <- read.xlsx(here("VQC R work", "Albuwell Data Sheets", "Long_PFAS_Metadata.xlsx"), colNames = F, rowNames = T)

df.ACR <- merge(creatinine_table, albumin_table, by = "Sample")

df.ACR <- df.ACR %>%
  mutate(Alb_mg_dL = Alb_Concentration / 10) %>% 
  mutate(Cre_g_dL = Cre_Concentration / 1000) %>%
  mutate(ACR_mg_g = Alb_mg_dL / Cre_g_dL)

df.ACR

a. Graph of ACR by group

ACR_meta <- tidy_data_solved_dil_meta %>%
  left_join(df.ACR, by = c("Sample" = "Sample"))

levels(df.ACR$'Timepoint Treatment')
## NULL
levels(ACR_meta$'Timepoint Treatment')
## [1] "Baseline Vehicle" "Final Vehicle"    "Baseline 1mg"     "Final 1mg"       
## [5] "Baseline 10mg"    "Final 10mg"
ACR_meta$`Timepoint Treatment` <- as.factor(ACR_meta$`Timepoint Treatment`)

ACR_meta$`Timepoint Treatment` <- factor(ACR_meta$`Timepoint Treatment`,
  levels = c("Baseline Vehicle", "Final Vehicle", "Baseline 1mg", "Final 1mg", "Baseline 10mg", "Final 10mg")
)

ggplot(ACR_meta, aes(x = `Timepoint Treatment`, y = ACR_mg_g, color = as.character(Treatment))) +
  geom_point(size = 3) +
  theme_minimal() +
  labs(title = "ACR by Treatment",
       x = "Timepoint Treatment",
       y = "ACR",
       color = "Treatment")

5. Load Sample Info and Merge the data (Week)

test_week <- read.xlsx(here("VQC R work", "Albuwell Data Sheets", "Albuwell_Week_070325.xlsx"))

# isolate to plate
subset_df_week <- test_week[9:16, 1:12]

# rename the columns to 1-11
colnames(subset_df_week) <- c("Row", 1:11)
subset_df_week %>%
  mutate(across(-Row, as.numeric)) %>%  # or use as.character if preferred
  pivot_longer(cols = -Row, names_to = "Column", values_to = "Value")
clean_df_week <- subset_df_week %>%
  mutate(across(-Row, as.numeric))  # or as.character if needed


tidy_df_week <- clean_df_week %>%
  pivot_longer(cols = -Row,
               names_to = "Column",
               values_to = "Value") %>%
  mutate(Cell = paste0(Row, Column)) %>%
  select(Cell, Value)
df.Sample_week <- read.xlsx(here("VQC R work", "Albuwell Data Sheets", "Long_PFAS_Albuwell_Sample_Week_070325.xlsx"))

tidy_data_week <- tidy_df_week %>%
  left_join(df.Sample_week, by = c("Cell" = "Cell")) 

a. Graph the Ladder

Graph ladder and calculate the curve fit

# filter if Sample is Ladder or Blank

df.Ladder_week <- tidy_data_week %>%
  filter(Sample == "Ladder" | Sample == "Blank")

# Subtract Blank and remove the Blank row

df.Ladder2_week <- df.Ladder_week %>%
    mutate(Value2 = ifelse(Sample != "Blank", Value - filter(df.Ladder, Sample == "Blank")$Value, NA)) %>%
    filter(!is.na(Value2))

# Create a scatter plot of df.Ladder2

ggplot(df.Ladder2_week, aes(x = Ladder_Value, y = Value2, color = Sample)) +
  geom_point() +
  theme_minimal() +
  labs(title = "Ladder",
       x = "Cell",
       y = "Value",
       color = "Sample")

# curve fit the data using a natural log

x <- df.Ladder2_week$Ladder_Value
y <- df.Ladder2_week$Value2

# Fit the non-linear regression model
fit <- nls(y ~ a + b * log(x), start = list(a = 1, b = 1))

# Print summary of the non-linear regression
summary(fit)
## 
## Formula: y ~ a + b * log(x)
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a  1.02643    0.02426    42.3 1.98e-14 ***
## b -0.25579    0.01728   -14.8 4.52e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08964 on 12 degrees of freedom
## 
## Number of iterations to convergence: 1 
## Achieved convergence tolerance: 9.19e-08
# Extract coefficients
coefficients <- coef(fit)
m <- as.numeric(coefficients[2])
b <- as.numeric(coefficients[1])

# Calculate predicted values
predicted <- predict(fit, list(x = x))

# Calculate R-squared
residuals <- y - predicted
rss <- sum(residuals^2)
tss <- sum((y - mean(y))^2)
r_squared <- 1 - rss/tss

# Create data for plotting
plot_data_week <- data.frame(x = x, y = y, Predicted = predicted)

# Create the ggplot with scatter plot and fitted line
p <- ggplot(plot_data_week, aes(x = x, y = y)) +
  geom_point(color = "darkred", size = 5) +  # Scatter plot of x vs y
  geom_line(aes(y = Predicted), color = "red") +  # Fitted line
  labs(
    title = paste0("Scatter plot with Non-linear Logarithmic Fit: y =", round(m, 3), "ln(x) +", round(b, 3), "\nR² =", round(r_squared, 3)),
    x = "Albumin Concentration",
    y = "Absorbance"
  ) +
  theme_classic()

# Print the plot
print(p)

Calcuate back the latter values based on the fit equation

# Subtract Blank and calculate the values

tidy_data_blank_week <- tidy_data_week %>%
    mutate(Value2 = ifelse(Sample != "Blank", Value - filter(df.Ladder, Sample == "Blank")$Value, NA)) %>%
    filter(!is.na(Value2))

tidy_data_solved_week <- tidy_data_blank_week  %>%
  mutate(Solved = exp((Value2 - b) / m))

df_test_week <- tidy_data_solved_week %>%
  filter(Sample == "Ladder")

# Create a scatter plot of df_test

ggplot(df_test_week, aes(x = Ladder_Value, y = Solved, color = as.character(Ladder_Value))) +
  geom_point(size = 5) +
  theme_minimal() +
  labs(title = "Checking Equation Fit Against Ladder",
       x = "Sample",
       y = "Value",
       color = "Cell")

Graph the sample data on the solved curve split by dilution factor

df_sample_test_week <- tidy_data_solved_week %>%
  filter(Type == "Sample")

df_sample_test_week
p + geom_point(data = df_sample_test_week, aes(x = Solved, y = Value2, color = as.character(Dilution)), size = 3) +
  labs(color = "Dilution")  # Change the legend title to "Dilution"

Calculate with Dilution Factor

tidy_data_solved_dil_week <- tidy_data_solved_week  %>%
  mutate(Concentration = Solved * Dilution)

# createa  scatterplot of tidy_data_solved_dil of the Concentration that colors by dilution

ggplot(tidy_data_solved_dil_week, aes(x = Sample, y = Concentration, color = as.character(Replicate))) +
  geom_point(size = 3) +
  theme_minimal() +
  labs(
    title = "Concentration of Albumin by Dilution",
    x = "Dilution",  # Change if "Sample" is not "Cell"
    y = "Concentration (µg/mL)", 
    color = "Replicate"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# create a new column that measures the mean of the values for each sample

tidy_data_solved_dil2_week <- tidy_data_solved_dil_week %>%
  group_by(Sample) %>%
  mutate(Mean = mean(Concentration, na.rm = TRUE))

ggplot(tidy_data_solved_dil2_week, aes(x = Sample, y = Mean, color = as.character(Dilution))) +
  geom_point(size = 3) +
  theme_minimal() +
  labs(
    title = "Concentration of Albumin by Dilution",
    x = "Dilution",  # Or "Sample", based on context
    y = "Mean Albumin Concentration (µg/mL)",  
    color = "Dilution"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

6. Add Sample Metadata (Week)

df.meta_week <- read.xlsx(here("VQC R work", "Albuwell Data Sheets", "Long_PFAS_Metadata_Week.xlsx"), colNames = F, rowNames = T)

df.meta2_week <- df.meta_week %>%
t() %>%
as.data.frame() 

df.meta2_week$UV <- as.numeric(df.meta2_week$UV)

tidy_data_solved_dil_meta_week <- tidy_data_solved_dil2_week %>%
  left_join(df.meta2_week, by = c("Sample"))

tidy_data_solved_dil_meta2_week <- tidy_data_solved_dil_meta_week  %>%
filter(!is.na(Concentration)) %>%
mutate(Alb = Mean * UV)

#Create albumin table
albumin_table_week <- tidy_data_solved_dil2_week %>%
  group_by(Sample, Dilution) %>%   # group by both Sample and Dilution
  summarize(Alb_Concentration = mean(Concentration), .groups = "drop")

7. Read Creatinine (By Week)

df_creatinine_week <- read.xlsx(here("VQC R work", "Albuwell Data Sheets", "Creatinine_Week_070325.xlsx"))

subset_pre_week <- df_creatinine_week[9:16, 1:12]

# rename the columns to 1-12
colnames(subset_pre_week) <- c("Row", 1:11)

subset_pre_week <- subset_pre_week %>%
  mutate(across(-Row, as.numeric))

tidy_df_pre_week <- subset_pre_week  %>%
  pivot_longer(cols = -Row, # Pivot all columns except Row
               names_to = "Column", # Name of new column for the old column names
               values_to = "Value") %>% # Name of new column for the values
  mutate(Cell = paste0(Row, Column))%>% # Create the new Cell column
  select(-Row, -Column) %>%
  select(Cell, Value)

as.character(colnames(subset_pre_week))
##  [1] "Row" "1"   "2"   "3"   "4"   "5"   "6"   "7"   "8"   "9"   "10"  "11"
#set the colnames as characters

df_creatinine_post_week <- read.xlsx(here("VQC R work", "Albuwell Data Sheets", "Creatinine_Acid_Week_070325.xlsx"))

subset_post_week <- df_creatinine_post_week[9:16, 1:12]

# rename the columns to 1-12
colnames(subset_post_week) <- c("Row", 1:11)

subset_post_week <- subset_post_week %>%
  mutate(across(-Row, as.numeric))

tidy_df_post_week <- subset_post_week  %>%
  pivot_longer(cols = -Row, # Pivot all columns except Row
               names_to = "Column", # Name of new column for the old column names
               values_to = "Value") %>% # Name of new column for the values
  mutate(Cell = paste0(Row, Column))%>% # Create the new Cell column
  select(-Row, -Column) %>%
  select(Cell, Value)

# merge tidy_df_pre and tidy_df_post by Row

tidy_df_week <- merge(tidy_df_pre_week, tidy_df_post_week, by = "Cell")


tidy_df_week <- tidy_df_week %>%
  mutate(Value = Value.x - Value.y) %>%
  select(Cell, Value)

Load Sample Info and Merge the data

# Load Sample Info

df.Sample_week <- read.xlsx(here("VQC R work", "Albuwell Data Sheets", "Creatinine_Sample_Week_070325.xlsx"))

tidy_data_week <- tidy_df_week %>%
  left_join(df.Sample_week, by = c("Cell" = "Cell")) 

a. Graph the Ladder

Graph ladder and calculate the curve fit for creatinine

# filter if Sample is Ladder or Blank

df.Ladder_Cre_week <- tidy_data_week %>%
  filter(Sample == "Ladder")


ggplot(df.Ladder_Cre_week, aes(x = Ladder_Value, y = Value, color = Sample)) +
  geom_point() +
  theme_minimal() +
  labs(title = "Ladder",
       x = "Cell",
       y = "Value",
       color = "Sample")

# curve fit the data using a natural log

x <- df.Ladder_Cre_week$Ladder_Value
y <- df.Ladder_Cre_week$Value

fit <- lm(Value ~ Ladder_Value, data = df.Ladder_Cre_week)

# Print summary of the non-linear regression
summary(fit)
## 
## Call:
## lm(formula = Value ~ Ladder_Value, data = df.Ladder_Cre_week)
## 
## Residuals:
##         1         2         3         4         5         6 
## -0.027993  0.026007  0.006966  0.001966 -0.004474 -0.002474 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.019194   0.012565   1.528    0.201    
## Ladder_Value 0.045280   0.002075  21.822 2.61e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01961 on 4 degrees of freedom
## Multiple R-squared:  0.9917, Adjusted R-squared:  0.9896 
## F-statistic: 476.2 on 1 and 4 DF,  p-value: 2.609e-05
coefficients <- coef(fit)
m <- coefficients["Ladder_Value"]
b <- coefficients["(Intercept)"]


plot_data_week <- data.frame(x = x, y = y, Predicted = predict(fit, newdata = data.frame(Ladder_Value = x)))


# Create the ggplot with scatter plot and fitted line
p <- ggplot(plot_data_week, aes(x = x, y = y)) +
  geom_point() +  # Scatter plot of x vs y
  geom_line(aes(y = Predicted), color = "red") +  # Fitted line
  labs(
    title = paste0("Scatter plot with Non-linear Logarithmic Fit: y =", round(m, 3), "ln(x) +", round(b, 3)),
    x = "Creatinine Concentration",
    y = "Absorbance"
  ) +
  theme_classic() +
  geom_point(color = "darkred", size = 5)

p

b. Calcuate the values from the samples

# Subtract Blank and calculate the values

tidy_data_blank_week <- tidy_data_week %>%
   mutate(Value = ifelse(Sample != "Blank", Value - filter(df.Ladder, Sample == "Blank")$Value, NA)) %>%
   filter(!is.na(Value))

tidy_data_blank_week <- tidy_data_week %>%
    mutate(Value = Value) %>%
    filter(!is.na(Value))

tidy_data_solved_week <- tidy_data_blank_week  %>%
  mutate(Solved = ((Value - b) / m))

df_test_week <- tidy_data_solved_week %>%
  filter(Sample == "Ladder")

# Create a scatter plot of df_test

ggplot(df_test_week, aes(x = Ladder_Value, y = Solved, color = as.character(Ladder_Value))) +
  geom_point() +
  theme_minimal() +
  labs(title = "Checking Equation Fit Against Ladder",
       x = "Sample",
       y = "Value",
       color = "Cell")

df_sample_test_week <- tidy_data_solved_week %>%
  filter(Sample != "Blank" | Sample != "Ladder"| Sample != "Max")

# filter tidy_data_solved to not include Blank, Max, or Ladder in the Sample column

df_sample_test_week
p + geom_point(data = df_sample_test_week, aes(x = Solved, y = Value), color = as.character(df_sample_test_week$Dilution), size = 3)

# If Dilution is greater than 10 then write (Above), if the not write OK

df_sample_test_week <- df_sample_test_week %>%
  mutate(Dilution_calc = ifelse(Solved < 1, "Below", "OK"))

c. Calculate with Dilution Factor

tidy_data_solved_week <- tidy_data_solved_week %>%
  mutate(Dilution_calc = ifelse(Solved < 1, "Below", "OK"))

tidy_data_solved_dil3_week <- tidy_data_solved_week  %>%
  mutate(Concentration = Solved * Dilution) %>%
  filter(!is.na(Concentration))

# create a  scatterplot of tidy_data_solved_dil of the Concentration that colors by dilution

ggplot(tidy_data_solved_dil3_week, aes(x = Sample, y = Concentration, color = as.character(Dilution))) +
  geom_point() +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  labs(title = "Concentration of Creatinine by Dilution",
       x = "Cell",
       y = "Concentration",
       color = "Dilution")

ggplot(tidy_data_solved_dil3_week, aes(x = Sample, y = Concentration, color = Dilution_calc)) +
  geom_point() +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  labs(title = "Concentration of Creatinine by Dilution",
       x = "Cell",
       y = "Concentration",
       color = "Dilution")

table_week <- tidy_data_solved_dil3_week %>%
  group_by(Dilution, Dilution_calc) %>%
  summarize(count = n())

table_week
creatinine_table_week <- tidy_data_solved_dil3_week %>%
  filter(Dilution == "5") %>% 
  group_by(Sample) %>%
  summarize(Cre_Concentration = mean(Concentration))

8. Calculate ACR (Week)

df.meta_week <- read.xlsx(here("VQC R work", "Albuwell Data Sheets", "Long_PFAS_Metadata_Week.xlsx"), colNames = F, rowNames = T)

df.ACR_week <- merge(creatinine_table_week, albumin_table_week, by = "Sample")

df.ACR_week <- df.ACR_week %>%
  mutate(Alb_mg_dL = Alb_Concentration / 10) %>% 
  mutate(Cre_g_dL = Cre_Concentration / 1000) %>%
  mutate(ACR_mg_g = Alb_mg_dL / Cre_g_dL)

df.ACR_week

a. Graph of ACR by group

ACR_meta_week <- tidy_data_solved_dil_meta_week %>%
  left_join(df.ACR_week, by = c("Sample" = "Sample"))

levels(df.ACR_week$'Timepoint Treatment')
## NULL
levels(ACR_meta_week$'Timepoint Treatment')
## NULL
ACR_meta_week$`Timepoint Treatment` <- as.factor(ACR_meta_week$`Timepoint Treatment`)

ACR_meta_week$`Timepoint Treatment` <- factor(ACR_meta_week$`Timepoint Treatment`,
  levels = c("Week 0 1 mg", "Week 1 1 mg", "Week 2 1 mg", "Week 3 1 mg", "Week 4 1 mg", "Week 0 10 mg", "Week 1 10 mg", "Week 2 10 mg", "Week 3 10 mg")
)

ggplot(ACR_meta_week, aes(x = `Timepoint Treatment`, y = ACR_mg_g, color = Treatment)) +
  geom_point(size = 3) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

  labs(title = "ACR by Treatment",
       x = "Timepoint Treatment",
       y = "ACR",
       color = "Treatment")
## $x
## [1] "Timepoint Treatment"
## 
## $y
## [1] "ACR"
## 
## $colour
## [1] "Treatment"
## 
## $title
## [1] "ACR by Treatment"
## 
## attr(,"class")
## [1] "labels"

Session Info

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS 15.0
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] tidyr_1.3.1        ggplotify_0.1.2    pheatmap_1.0.12    openxlsx_4.2.7.1  
##  [5] dichromat_2.0-0.1  here_1.0.1         ggrepel_0.9.6      RColorBrewer_1.1-3
##  [9] ggpmisc_0.6.1      ggpp_0.5.8-1       tibble_3.2.1       ggplot2_3.5.1     
## [13] knitr_1.49         patchwork_1.3.0    dplyr_1.1.4       
## 
## loaded via a namespace (and not attached):
##  [1] yulab.utils_0.1.8  sass_0.4.9         utf8_1.2.4         generics_0.1.3    
##  [5] stringi_1.8.4      lattice_0.22-6     digest_0.6.37      magrittr_2.0.3    
##  [9] evaluate_1.0.1     grid_4.4.1         fastmap_1.2.0      rprojroot_2.0.4   
## [13] jsonlite_1.8.9     Matrix_1.7-0       zip_2.3.1          survival_3.6-4    
## [17] purrr_1.0.2        fansi_1.0.6        scales_1.3.0       jquerylib_0.1.4   
## [21] cli_3.6.3          rlang_1.1.4        munsell_0.5.1      splines_4.4.1     
## [25] withr_3.0.1        cachem_1.1.0       yaml_2.3.10        tools_4.4.1       
## [29] SparseM_1.84-2     polynom_1.4-1      MatrixModels_0.5-3 colorspace_2.1-1  
## [33] gridGraphics_0.5-1 vctrs_0.6.5        R6_2.5.1           lifecycle_1.0.4   
## [37] fs_1.6.5           MASS_7.3-60.2      pkgconfig_2.0.3    pillar_1.9.0      
## [41] bslib_0.8.0        gtable_0.3.5       glue_1.8.0         Rcpp_1.0.13       
## [45] xfun_0.49          tidyselect_1.2.1   rstudioapi_0.17.1  farver_2.1.2      
## [49] htmltools_0.5.8.1  labeling_0.4.3     rmarkdown_2.29     compiler_4.4.1    
## [53] quantreg_5.99.1